Getting Started with mongolstats

Installation

Install from GitHub:

# install.packages("devtools")
# devtools::install_github("temuulene/mongolstats")

Regional Comparison

Compare infant mortality across different regions:

# Get all aimags for most recent year
# Get all aimags for most recent year (2024)
# We'll take the average of monthly rates
months_2024 <- months |>
    filter(grepl("2024", label_en)) |>
    pull(code)

# Fetch IMR data for all regions in 2024
# We'll calculate the annual average from monthly data

imr_regional <- nso_data(
    tbl_id = "DT_NSO_2100_015V1",
    selections = list(
        "Region" = nso_dim_values("DT_NSO_2100_015V1", "Region")$code,
        "Month" = months_2024
    ),
    labels = "en"
) |>
    filter(nchar(Region) == 3) |> # Keep only Aimags and Ulaanbaatar (code length = 3)
    mutate(
        Region_en = trimws(Region_en),
        # Standardize region names to match geographic boundary data
        Region_en = dplyr::case_match(
            Region_en,
            "Bayan-Ulgii" ~ "Bayan-Ölgii",
            "Uvurkhangai" ~ "Övörkhangai",
            "Khuvsgul" ~ "Hovsgel",
            "Umnugovi" ~ "Ömnögovi",
            "Tuv" ~ "Töv",
            "Sukhbaatar" ~ "Sükhbaatar",
            .default = Region_en
        ),
        Type = ifelse(Region %in% c("1", "2", "3", "4"), "Region", "Aimag")
    ) |>
    # Calculate annual average IMR from monthly data
    group_by(Region_en, Type) |>
    summarise(value = mean(value, na.rm = TRUE), .groups = "drop")

# Top 10 highest IMR regions
imr_regional |>
    arrange(desc(value)) |>
    select(Region_en, value) |>
    head(10)
#> # A tibble: 10 × 2
#>    Region_en    value
#>    <chr>        <dbl>
#>  1 Hovsgel       27.2
#>  2 Arkhangai     24.8
#>  3 Övörkhangai   23.9
#>  4 Bayankhongor  21.6
#>  5 Ömnögovi      19.9
#>  6 Uvs           19.8
#>  7 Sükhbaatar    17.9
#>  8 Bayan-Ölgii   17.7
#>  9 Zavkhan       17.5
#> 10 Khovd         16.8

Visualize Regional Disparities

p <- imr_regional |>
    filter(!is.na(value)) |>
    arrange(desc(value)) |>
    mutate(Region_en = forcats::fct_reorder(Region_en, value)) |>
    ggplot(aes(x = value, y = Region_en,
               text = paste0("<b>Region:</b> ", Region_en, "<br>",
                             "<b>IMR:</b> ", round(value, 1)))) +
    # Aimags with gradient
    geom_col(data = ~ subset(., Type == "Aimag"), aes(fill = value), width = 0.7) +
    # Regions with distinct color
    geom_col(data = ~ subset(., Type == "Region"), fill = "#2c3e50", width = 0.7) +
    geom_text(aes(label = round(value, 1)), hjust = -0.2, color = "grey30", size = 3.5) +
    scale_fill_gradient2(
        low = "#27ae60",
        mid = "#f39c12",
        high = "#e74c3c",
        midpoint = mean(imr_regional$value[imr_regional$Type == "Aimag"])
    ) +
    geom_vline(
        xintercept = mean(imr_regional$value[imr_regional$Type == "Aimag"]),
        linetype = "dashed",
        color = "grey50",
        linewidth = 0.5
    ) +
    scale_x_continuous(expand = expansion(mult = c(0, 0.1))) +
    labs(
        title = "Infant Mortality by Aimag (2024 Average)",
        subtitle = "Dark bars represent Regional Averages",
        x = "Deaths per 1,000 live births",
        y = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(
        plot.title = element_text(face = "bold", size = 14),
        panel.grid.major.y = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.y = element_text(color = "black")
    )

plotly::ggplotly(p, tooltip = "text")

Adding Geographic Context

Combine with mapping for spatial analysis:

library(sf)

# Get aimag boundaries
aimags <- mn_boundaries(level = "ADM1")

# Join IMR data to map
imr_map <- aimags |>
    left_join(imr_regional, by = c("shapeName" = "Region_en"))

# Create choropleth
# Create choropleth
# Create choropleth
p <- imr_map |>
    ggplot() +
    geom_sf(aes(fill = value,
                text = paste0("<b>Region:</b> ", shapeName, "<br>",
                              "<b>IMR:</b> ", round(value, 1))),
            color = "white", size = 0.2) +
    scale_fill_viridis_c(
        option = "magma",
        direction = -1,
        name = "IMR\n(per 1,000)",
        labels = scales::label_number()
    ) +
    labs(
        title = "Infant Mortality Geography (2024 Average)",
        subtitle = "Spatial distribution of mortality rates",
        caption = "Source: NSO Mongolia"
    ) +
    theme_void() +
    theme(
        plot.title = element_text(face = "bold", size = 16),
        plot.subtitle = element_text(color = "grey40"),
        legend.position = "right",
        legend.title = element_text(size = 10, face = "bold")
    )

plotly::ggplotly(p, tooltip = "text") |>
    plotly::style(hoveron = "fills")

Key Functions Summary

Function Purpose Example
nso_itms_search() Find tables by keyword nso_itms_search("mortality")
nso_table_meta() Get table dimensions nso_table_meta("DT_NSO_...")
nso_dim_values() List dimension values nso_dim_values(tbl, "Region")
nso_table_periods() Check time coverage nso_table_periods(tbl)
nso_data() Fetch data nso_data(tbl, selections, labels)
mn_boundaries() Get geographic boundaries mn_boundaries(level = "ADM1")

Best Practices

  1. Always use labels: Set labels = "en" in nso_data() for readable output
  2. Check metadata first: Use nso_table_meta() to understand dimensions before fetching
  3. Use appropriate selections: Specify dimensions by their English labels (e.g., "Total" not "0")
  4. Filter carefully: Exclude total rows (usually code "0") when analyzing subgroups
  5. Clean labels: Use trimws() to remove leading/trailing spaces from region names before joining

Common Workflows

Time Series Analysis

  1. Search for table → Check periods → Fetch years → Plot trend

Regional Comparison

  1. Search table → Get all regions → Fetch latest year → Compare rates

Spatial Epidemiology

  1. Fetch regional data → Get boundaries → Join → Create choropleth

Next Steps

Quick Reference: Common Health Tables

Indicator Table_ID
Infant Mortality DT_NSO_2100_015V1
Maternal Mortality DT_NSO_2100_050V1
Under-5 Mortality DT_NSO_2100_030V2
Cancer Incidence DT_NSO_2100_012V1
TB Incidence DT_NSO_2800_026V1
Communicable Diseases DT_NSO_2100_020V2